Question 1: Find huffman Code

  1. File Compression Write a program that on input a simple ASCII text

file does the following:

• Finds all the characters appearing in the file.
• Counts the frequencies of all the characters.
• Constructs and shows the Huffman code corresponding to the computed
frequencies.
• Encodes your file using the code (Hint: Bitwise operator).
• Computes the compression ratio Output File size / Input File size 

In [59]:
def generate_random_text_file(length = 200, debug = False):
    """generate a text file with random string"""
    text_file = open("./random_text.txt","w")
    import random, string
    random_string = ''.join([random.choice(string.ascii_letters + string.digits) for n in xrange(length)])
    if debug: print random_string
    text_file.write(random_string)
    text_file.close()
    
def load_file_char_freq(path_to_file= "./random_text.txt", debug = False):
    """Load a file, and get frequencies of each character. Out is a dictionary in form {"char": 'freq'}"""
    path = raw_input("Relative path to text file. Leave blank for to generate random text: ")
    if path is "": generate_random_text_file()
    else: path_to_file = path
    
    try:   text_file = open(path_to_file,"r")
    except: 
        print "File not found"
        raise IOError("No file found with that name")
    
    text = text_file.read()
    list_of_chars = list(text)
    dict_of_chars= {}
    for char in list_of_chars:
        try: dict_of_chars[char] +=1
        except: dict_of_chars[char] = 1
    if debug: 
        print dict_of_chars
        print text

    text_file.close()
    return dict_of_chars, text

def calculate_compression(old_string, new_string):
    """Calculates ratio, which is nothing but ratio of string lengths. Since huffman stores in binary, we must devided by 8."""
    return len(new_string)/len(old_string) / 8.0 * 100.0

def huffman_encode(dict_of_chars, debug = False):
    """Huffman encode the given dict mapping symbols to weights
    Leaf is of form [ [freq, [symbol, code]]. Python gives min heap from via Heapify function"""
    
    heap = [[frequency, [char, ""]] for char, frequency in dict_of_chars.items()]   #Construct un Heapified heap
    heapify(heap)  # Python's In place Minheap
    if debug:  print "Trying to Encode: ", sorted(dict_of_chars.iteritems())
    while len(heap) > 1:
        small_node = heappop(heap)         # Get smallest of  element from Min Heap
        big_node = heappop(heap)           # Get second smallest  element
        
        # The idea is to dynamically generate codes. If node is pushed down-left, add a Zero. If pushed to down-right, add a one
        
        for node in small_node[1:]:       
            node[1] = '0' + node[1]
            
        for node in big_node[1:]:
            node[1] = '1' + node[1]
        
        # New node has sum of freq as new freq. All within subtree follow freq in list. Thier order is determined by thier code.
        new_node = [small_node[0] + big_node[0]] + small_node[1:] + big_node[1:]  
        
        # Push to min heap
        heappush(heap, new_node)
    
   
    #Return all leaves, which are already in list format [ Symbol, code]
    return heappop(heap)[1:]

The 'true' Huffman code creator assembles each symbol and its weight into the following structure initially (the leaf structure):

[ weight, [ symbol, []]]

The weight applies to every (in this case only one), of the

[symbol, []]

pairs after it in the same list. The empty list is used to accumulate the Huffman code for the symbol as we manipulate the heap, without having to walk a constructed tree structure.

Source: from Paddy3118 Go deh!: Huffman Encoding in Python http://paddy3118.blogspot.com/2009/03/huffman-encoding-in-python.html#ixzz3XJn3fTAK


In [60]:
from heapq import heappush, heappop, heapify

dict_of_chars, input_string = load_file_char_freq()
codes  = huffman_encode(dict_of_chars)

print "\n\nSymbol\t | Frequency\t | Huffman Code"
print "--------------------------------------"
for code in codes:
    print "%s \t\t %s \t\t %s" % (code[0], dict_of_chars[code[0]], code[1])
    
huff_code_dict = {}
for code in codes: huff_code_dict[code[0]] = code[1]
    

new_string= ''
for symbol in list(input_string):
    new_string += huff_code_dict[symbol]

print "\n\n------------- Calculating Compression Ratio--------------------------"
print "Original Char length: ", len(input_string) 
print "Original Binary length: ", str(len(input_string) * 8.0)
print "New Binary length: ", len(new_string) 
print "Compression Ratio is: ",calculate_compression(input_string, new_string),"%"


Relative path to text file. Leave blank for to generate random text: 


Symbol	 | Frequency	 | Huffman Code
--------------------------------------
B 		 5 		 00000
I 		 5 		 00001
O 		 5 		 00010
Y 		 5 		 00011
Z 		 5 		 00100
a 		 5 		 00101
c 		 5 		 00110
d 		 5 		 00111
k 		 5 		 01000
m 		 5 		 01001
y 		 2 		 010100
0 		 3 		 010101
1 		 3 		 010110
6 		 3 		 010111
A 		 3 		 011000
D 		 3 		 011001
F 		 3 		 011010
J 		 3 		 011011
P 		 6 		 01110
W 		 3 		 011110
e 		 3 		 011111
i 		 6 		 10000
j 		 3 		 100010
n 		 3 		 100011
o 		 6 		 10010
p 		 6 		 10011
q 		 3 		 101000
x 		 3 		 101001
t 		 6 		 10101
C 		 7 		 10110
z 		 3 		 101110
2 		 2 		 1011110
3 		 1 		 10111110
5 		 1 		 10111111
7 		 1 		 11000000
8 		 1 		 11000001
E 		 1 		 11000010
H 		 1 		 11000011
G 		 4 		 110001
K 		 2 		 1100100
R 		 2 		 1100101
L 		 4 		 110011
M 		 4 		 110100
N 		 4 		 110101
Q 		 4 		 110110
S 		 4 		 110111
T 		 1 		 11100000
U 		 1 		 11100001
V 		 2 		 1110001
X 		 4 		 111001
b 		 2 		 1110100
f 		 2 		 1110101
h 		 4 		 111011
l 		 2 		 1111000
r 		 2 		 1111001
s 		 2 		 1111010
u 		 1 		 11110110
w 		 1 		 11110111
v 		 4 		 111110
9 		 5 		 111111


------------- Calculating Compression Ratio--------------------------
Original Char length:  200
Original Binary length:  1600.0
New Binary length:  1151
Compression Ratio is:  62.5 %

Question 2: King of the Tournament

There are n players in a tournament. Each player plays against every other player. Thus, there are nC2 matches in the tournament. There are no draw in the matches. We say that player p1 subdued p2 if p1 defeated p2 or p1 defeated some player p3 who defeated p2. Note that p1 and p2 might have subdued each other. A king in the tournament is a player who has subdued every other player

Write a program that given the results of a tournament finds if it has a king, if yes then it should also output a king.

• Model the problem as a graph theoretic problem.
• Decide a good data structure to store the results of the matches.
• The run-time of your program should be O(n^2).

Answer:

Problem can be easily solved by making players as nodes, and edges as matches. Direction of edge represents outcome of match.

We run BFS from each node. If after BFS all nodes are visited, the Node is a king.

Assumptions:

* A player cannot defeat himself/ herself.
* Graph is rep by Python dictionary, where player is key, adjacency list is value.

In [14]:
from random import randint
def create_edge(tournament, winner, loser):

    tournament[winner].append( loser)
    return tournament

def generate_random_tournament_data(num_players = 10):
    """Just a simple function that simulates all the matches, randomly, outputs in Dict format"""
    i,j=0,0
    #Create list of players
    players = [ "Player "+str(num) for num in range(num_players)]
    
    tournament_data = {}
    for player in players: tournament_data[player] = []
    
    for i in range(len(players)):
        for oponent in players[i:]:
            if oponent is  players[i]: continue
            #print players[i], " vs ", oponent
            result = randint(0,1)
            if result ==0: tournament_data = create_edge(tournament_data, players[i], oponent )
            else: tournament_data = create_edge(tournament_data,  oponent, players[i] )
   
    return tournament_data

def breadth_first_search(graph, start):
    """ We use BFS to verify given player is king"""
    visited, queue = set(), [start]
    while queue:
        vertex = queue.pop(0)
        if vertex not in visited:
            visited.add(vertex)
            queue.extend(graph[vertex] - visited)

    return visited

def find_king(graph):
    """ Player with maximum outdegree is a king. Runtime O(n^2)"""
    max_out_degree=0
    king = None
    for player in graph.keys():
        curr_out_degree = len(graph[player])
        if curr_out_degree > max_out_degree:
            max_out_degree = curr_out_degree
            king = player
    return king

In [13]:
num_player = input("Please input number of players in tournament:")

tournament_dict = generate_random_tournament_data(num_player)
print "\n---------------------------------"
print " Player  \t |  Match WOn Against"
print "---------------------------------"
for player in tournament_dict.keys():
    print player, "\t", tournament_dict[player]
    tournament_dict[player] = set(tournament_dict[player])
    
print "\n\n----------------------\n", "Finding Kings\n", "----------------------\n"


king = None
king = find_king(tournament_dict)

# Verifying that king is infact a king, Runtime O(n^2)
visited = breadth_first_search(tournament_dict, king)

        
if king is  None: print "No King"
else: 
    print king, " is the King with ", len(visited)," subjugations."


Please input number of players in tournament:5

---------------------------------
 Player  	 |  Match WOn Against
---------------------------------
Player 4 	['Player 1', 'Player 3']
Player 3 	[]
Player 2 	['Player 0', 'Player 3', 'Player 4']
Player 1 	['Player 2', 'Player 3']
Player 0 	['Player 1', 'Player 3', 'Player 4']


----------------------
Finding Kings
----------------------

Player 4 2 2
Player 2 3 3
Player 2  is the King with  5  subjugations.

In [6]:



Out[6]:
0

In [5]:


In [5]:


In [ ]: